1 Setup

suppressPackageStartupMessages({
  library(here)
  library(hyperSpec)
  library(gplots)
  library(ggplot2)
  library(magrittr)
  library(tidyverse)
})

#silence chunks
knitr::opts_chunk$set(message = FALSE, warning = FALSE)

2 Data preparation

We use a combination of the Sequence Description and Gene Ontology (GO) terms to fetch cell-wall-related gene candidates

# Annotation
annotation <- read_tsv(here("data/b2g/blast2go_20190117_export.txt.gz"),show_col_types=FALSE) %>% 
  mutate(`Sequence Name`=sub("\\.p\\d+$","",`Sequence Name`))

# GO dictionary
go_annot <- annotation %>% select(`Annotation GO ID`,`Annotation GO Term`,`Annotation GO Category`) %>% 
  rename(ID=`Annotation GO ID`,Term=`Annotation GO Term`,Category=`Annotation GO Category`) %>% 
  filter(!is.na(ID)) 

go_annot <- tibble(ID=unlist(str_split(go_annot$ID,"\\|"),use.names=FALSE),
                   Term=unlist(str_split(go_annot$Term,"\\|"),use.names=FALSE),
                   Category=unlist(str_split(go_annot$Category,"\\|"),use.names=FALSE)) %>% distinct()
# Expression
expression <- read_tsv(here("data/analysis/salmon/variance-stabilised_model-aware_gene-expression_data.tsv"),
                       show_col_types=FALSE)
# Reverse sample swap
expression %<>% rename(B2_2_24hrs_D="B2_2_12hrs_D",B2_2_12hrs_D="B2_2_24hrs_D")

# Gene of interest
goi <- unlist(annotation[grepl("cell wall",annotation$`Sequence Description`) | 
                           grepl("cell wall",annotation$`Annotation GO Term`),"Sequence Name"],use.names=FALSE)

#' replace (\\.p\\d+$) .p followed by any number of digits. The $ the sign that indicates the end of the text
goi <- sub("\\.p\\d+$","",goi)

#' sanity
stopifnot(length(goi)==length(unique(goi)))

message(sprintf("There are %s candidates",length(goi)))

#' * samples information
samples <-read_tsv(here("doc/Samples-swap-corrected.tsv"),show_col_types = FALSE)

#' ## Expression
vst <- expression %>% filter(rowname %in% goi) %>% column_to_rownames() %>% as.matrix()
sel <- rowSums(vst) > 0
vst <- vst[sel,]

samples <- samples[match(colnames(vst),samples$SampleID),]

# Export
# write_tsv(vst %>% as_tibble() %>% rownames_to_column("ID"),
#           here("data/analysis/cell-wall/goi-vst-expression_with-geneID.tsv"))

3 Correlation

3.1 calculation

Next, we look at the correlation between cell wall thickness and change in expression (mean of 4 replicates). We keep only the time points that are present in both datasets:

1h, 4hs, 12hs, 24hs, 72hs, 120hs

#cell wall
cw <- read_tsv(here("doc/Cell-wall-measurement.tsv"))
cw %<>% filter(!time_in_hours %in% c(48,240))
cw_ctrl <- cw %>% filter(treatment %in% "control")
cw_cold <- cw %>% filter(treatment %in% "cold")

colnames(vst) %<>% gsub(pattern = "B2_2_", replacement = "")
colnames(vst) %<>%  gsub(pattern = "60min", replacement = "1hrs_")
vst %<>% as.data.frame()
vst$gene <- rownames(vst)

#calculating the average expression of reps
vst_avg <- vst %>% pivot_longer(cols = -gene, names_to = c("time", "rep"), names_sep = "_") %>%
  group_by(time, gene) %>% 
  summarize(meanExp = mean(value)) %>% 
  pivot_wider(names_from = time, values_from = meanExp)

#reorder the columns based on the cw data
vst_avg <- vst_avg[, c(1, match(paste0(cw_ctrl$time_in_hours, "hrs"), colnames(vst_avg)))]

#correlation
vst_avg %<>% 
  rowwise() %>% 
  mutate(ctrl_cor = cor(c_across(`1hrs`:`120hrs`),
                        cw_ctrl$width_in_um %>% as.vector(),
                        method = "pearson"),
         cold_cor = cor(c_across(`1hrs`:`120hrs`),
                        cw_cold$width_in_um %>% as.vector(),
                        method = "pearson"))

3.2 plotting

Below we look at the distribution of the correlation values for cell wall genes.

# Reshape data for ggplot
df <- vst_avg %>% 
  select(ctrl_cor,cold_cor) %>% 
  pivot_longer(cols = everything()) %>% 
  mutate(Color = ifelse(value >= 0.7, "red",
                        ifelse(value <= -0.7, "blue", "gray")))
# Create the plot
ggplot(data = df, aes(x = name, y = value)) +
  geom_violin() +
  geom_jitter(aes(color = Color), width = 0.2) +
  scale_color_manual(values = c("blue", "grey", "red")) +
  theme(legend.position = "none", text = element_text(size = 13)) +
  labs(x = "", y = "Correlation values") + 
  geom_hline(yintercept = 0.7, linetype = "dashed", color = "red") +
  geom_hline(yintercept = -0.7, linetype = "dashed", color = "blue")

#frequencies
df %<>% 
  mutate(
    range = factor(case_when(
      (value >= -1 & value < -0.9) ~ "-1",
      (value >= -0.9 & value < -0.8) ~ "-0.9",
      (value >= -0.8 & value < -0.7) ~ "-0.8",
      (value >= -0.7 & value < -0.6) ~ "-0.7",
      (value >= -0.6 & value < -0.5) ~ "-0.6",
      (value >= -0.5 & value < -0.4) ~ "-0.5",
      (value >= -0.4 & value < -0.3) ~ "-0.4",
      (value >= -0.3 & value < -0.2) ~ "-0.3",
      (value >= -0.2 & value < -0.1) ~ "-0.2",
      (value >= -0.1 & value < 0) ~ "-0.1",
      (value >= 0 & value < 0.1) ~ "0.1",
      (value >= 0.1 & value < 0.2) ~ "0.2",
      (value >= 0.2 & value < 0.3) ~ "0.3",
      (value >= 0.3 & value < 0.4) ~ "0.4",
      (value >= 0.4 & value < 0.5) ~ "0.5",
      (value >= 0.5 & value < 0.6) ~ "0.6",
      (value >= 0.6 & value < 0.7) ~ "0.7",
      (value >= 0.7 & value < 0.8) ~ "0.8",
      (value >= 0.8 & value < 0.9) ~ "0.9",
      (value >= 0.9 & value <= 1) ~ "1",
      TRUE ~ NA_character_), 
      levels = c( "-1", "-0.9", "-0.8", "-0.7", "-0.6", "-0.5", "-0.4", "-0.3", "-0.2", "-0.1",
                  "0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8", "0.9", "1"))) %>% 
  group_by(range, name) %>%
  summarise(freq = n()) %>%
  ungroup() %>% 
  mutate(barcol = case_when(name == "ctrl_cor" ~ "forestgreen", 
                            name == "cold_cor" ~"deepskyblue"))
df %>% 
  ggplot(aes(y = ifelse(test = name == "cold_cor", yes = -freq, no = freq), 
             x = range, fill = name)) +
  geom_col() +
  scale_y_continuous() + 
  scale_fill_manual(values = unique(df$barcol)) + 
  labs(x = "Correlation", y = "Number of genes") + 
  theme(text = element_text(size = 12))

3.3 mfuzz clusters

Next, we identify the mfuzz clusters associated with our highly correlated genes.

#genes highly correlated with widths change in cold
goi_cold_up <- vst_avg %>% filter(cold_cor >= 0.7) %>% .$gene
goi_cold_dn <- vst_avg %>% filter(cold_cor <= -0.7) %>% .$gene


goi_ctrl_up <- vst_avg %>% filter(ctrl_cor >= 0.7) %>% .$gene
goi_ctrl_dn <- vst_avg %>% filter(ctrl_cor <= -0.7) %>% .$gene

#mfuzz clusters
clusts <- read_csv(here("data/mfuzz/clustermembership.csv"))
clusts <- clusts[,-1]

clusts %>% filter(NAME %in% goi_cold_up) %>% .$CLUSTER %>% table()
## .
##  2  3  5  8 
## 13 10  1  2
clusts %>% filter(NAME %in% goi_cold_dn) %>% .$CLUSTER %>% table()
## .
## 1 4 7 8 
## 2 2 1 1
clusts %>% filter(NAME %in% goi_ctrl_up) %>% .$CLUSTER %>% table()
## .
## 2 3 8 
## 8 3 2
clusts %>% filter(NAME %in% goi_ctrl_dn) %>% .$CLUSTER %>% table()
## .
## 4 
## 1

4 Session Info

Session Info
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] lubridate_1.9.2   forcats_1.0.0     stringr_1.5.0     dplyr_1.1.1      
##  [5] purrr_1.0.1       readr_2.1.4       tidyr_1.3.0       tibble_3.2.1     
##  [9] tidyverse_2.0.0   magrittr_2.0.3    gplots_3.1.3      hyperSpec_0.100.0
## [13] xml2_1.3.4        ggplot2_3.4.2     lattice_0.20-45   here_1.0.1       
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.10         deldir_1.0-6        png_0.1-8          
##  [4] gtools_3.9.4        rprojroot_2.0.3     digest_0.6.31      
##  [7] utf8_1.2.3          R6_2.5.1            evaluate_0.21      
## [10] highr_0.10          pillar_1.9.0        rlang_1.1.0        
## [13] lazyeval_0.2.2      rstudioapi_0.14     jquerylib_0.1.4    
## [16] rmarkdown_2.21      labeling_0.4.2      bit_4.0.5          
## [19] munsell_0.5.0       compiler_4.2.0      xfun_0.38          
## [22] pkgconfig_2.0.3     htmltools_0.5.5     tidyselect_1.2.0   
## [25] fansi_1.0.4         crayon_1.5.2        tzdb_0.3.0         
## [28] withr_2.5.0         bitops_1.0-7        brio_1.1.3         
## [31] jsonlite_1.8.4      gtable_0.3.3        lifecycle_1.0.3    
## [34] DBI_1.1.3           scales_1.2.1        KernSmooth_2.23-20 
## [37] cli_3.6.1           stringi_1.7.12      vroom_1.6.1        
## [40] cachem_1.0.8        farver_2.1.1        testthat_3.1.7     
## [43] latticeExtra_0.6-30 bslib_0.4.2         generics_0.1.3     
## [46] vctrs_0.6.1         RColorBrewer_1.1-3  tools_4.2.0        
## [49] interp_1.1-4        bit64_4.0.5         glue_1.6.2         
## [52] hms_1.1.3           jpeg_0.1-10         parallel_4.2.0     
## [55] fastmap_1.1.1       yaml_2.3.7          timechange_0.2.0   
## [58] colorspace_2.1-0    caTools_1.18.2      knitr_1.42         
## [61] sass_0.4.6

 

 

drawing

Created by Aman Zare

aman.zare@umu.se